Feedforward model with one hidden layer.
Trained using LOB and Liquidity measures together: Input dimension is 60 x 44 = 2640
Mean squared error used as loss function.
Optimizer: Adam
Training set consists of 202 days (~80%)
Validation set consists of 51 days (~20%)
Shown below:
Network structure
Settings used for training
Plots of model performance
import os
import torch
from torchsummary import summary
from feedforward_three_layer import FFNN3
from IPython.display import HTML
def hide_code(): return HTML('''<script>code_show=true; function code_toggle() {if (code_show){$('div.input').hide();} else {$('div.input').show();}code_show = !code_show} $( document ).ready(code_toggle);</script><form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
model=FFNN3()
ckpt_name = os.listdir()[[*map(lambda x: x.count('ckpt'),os.listdir())].index(True)]
ckpt = torch.load('./'+ckpt_name,map_location=torch.device('cpu'))
print(f'Input size: {60*44}')
summary(model.float(), input_size=(60,44))
hide_code()
import pandas as pd
from configs.train_config import cfg
configuration = pd.concat([
pd.DataFrame.from_dict(cfg,'index').loc[['STOCK','LOB','LIQ_VARS']],\
pd.DataFrame.from_dict(cfg.TRAIN,'index').loc[['SHUFFLE','VAL_SHUFFLE','INTRADAY_SHUFFLE'\
,'SPLIT_RATIO','BATCH_SIZE','VAL_BATCH_SIZE']], \
pd.DataFrame.from_dict(cfg.MODEL,'index').loc[['BACKBONE','LOSS','DROPOUT_RATE','LEAKY_SLOPE']],\
pd.DataFrame.from_dict(cfg.OPTIMIZER,'index').loc[['LR','METHOD','LR_SCHEDULER','LAMBDA']]],\
keys=['USED IN TRAINING','TRAIN','MODEL','OPTIMIZER']); configuration.columns=['CONFIGURATIONS']
print(f'Epoch loaded: {ckpt_name[5:-4]}')
configuration
import sys
sys.path.append('./../')
import numpy as np
from sklearn.linear_model import Ridge,LinearRegression
filenames = os.listdir(cfg.DATA.DATA_PATH)
[filenames.pop(i) for i,k in enumerate(filenames) if k.split('.')[-1]!='npy'] #.DS_STORE problem
filenames.sort()
train_datanames = []
if cfg.DATA.PORTION is not None:
filenames = filenames[:int(len(filenames)*cfg.DATA.PORTION)]
for filename in filenames:
if len(train_datanames) < int(len(filenames)*cfg.TRAIN.SPLIT_RATIO):
train_datanames.append(filename)
else:
break
val_datanames = [i for i in filenames if not train_datanames.count(i)]
X_t = [] ; y_t = [] ; X_v = [] ; y_v = []
for i in train_datanames:
item = np.load(os.path.join(cfg.DATA.DATA_PATH,i),allow_pickle='TRUE').item()
X_t.append(torch.from_numpy(item['X'])); y_t.append(torch.from_numpy(item['y']))
for i in val_datanames:
item = np.load(os.path.join(cfg.DATA.DATA_PATH,i),allow_pickle='TRUE').item()
X_v.append(torch.from_numpy(item['X'])); y_v.append(torch.from_numpy(item['y']))
model.double();model.load_state_dict(ckpt['model_state'], strict=False);torch.set_grad_enabled(False);model.eval()
temp = []
for i in X_t:
for k in i:
temp.append(model(torch.reshape(k,(1,*k.shape)))[0])
temp_v = []
for i in X_v:
for k in i:
temp_v.append(model(torch.reshape(k,(1,*k.shape)))[0])
for i,k in zip(temp,torch.cat(temp).reshape(-1,5)):
assert i.tolist() == k.tolist()
for i,k in zip(temp_v,torch.cat(temp_v).reshape(-1,5)):
assert i.tolist() == k.tolist()
y_t_pred = torch.cat(temp).reshape(-1,5) ; y_v_pred = torch.cat(temp_v).reshape(-1,5)
y_t = torch.cat(y_t) ; y_v = torch.cat(y_v)
se_train = (y_t_pred-y_t)**2 ; se_val = (y_v_pred-y_v)**2
ape_train = 100*(1-y_t_pred/y_t).abs() ; ape_val = 100*(1-y_v_pred/y_v).abs()
mse_train = se_train.mean(dim=0).numpy() ; mse_val = se_val.mean(dim=0).numpy()
mape_train = ape_train.mean(dim=0).numpy() ; mape_val = ape_val.mean(dim=0).numpy()
##### ##### ##### ##### ##### Linear Regression ##### ##### ##### ##### #####
X_reg = torch.cat(X_t).flatten(1,2)
X_reg = torch.cat([torch.tensor([[1]]*X_reg.shape[0],dtype=torch.float64),X_reg],1).tolist()
X_v_reg = torch.cat(X_v).flatten(1,2)
X_v_reg = torch.cat([torch.tensor([[1]]*X_v_reg.shape[0],dtype=torch.float64),X_v_reg],1)
regr = LinearRegression()
regr.fit(X_reg,y_t.tolist())
y_t_regs = regr.predict(X_reg) ; y_v_regs = regr.predict(X_v_reg)
se_train_reg = (y_t-y_t_regs)**2 ; se_val_reg = (y_v-y_v_regs)**2
ape_train_reg = 100*(1-y_t_regs/y_t).abs() ; ape_val_reg = 100*(1-y_v_regs/y_v).abs()
reg_train_mses = se_train_reg.mean(dim=0).numpy() ; reg_mses = se_val_reg.mean(dim=0).numpy()
reg_train_mapes = ape_train_reg.mean(dim=0).numpy() ; reg_mapes = ape_val_reg.mean(dim=0).numpy()
##### ##### ##### ##### ##### Save Errors ##### ##### ##### ##### #####
np.save('../errors/LOB+LIQ.npy', {'se_train':se_train.numpy(),'se_val':se_val.numpy(),\
'ape_train':ape_train.numpy(),'ape_val':ape_val.numpy()})
np.save('../errors/LinReg_LOB+LIQ.npy', {'se_train':se_train_reg.numpy(),'se_val':se_val_reg.numpy(),
'ape_train':ape_train_reg.numpy(),'ape_val':ape_val_reg.numpy()})
Below we have five plots for five different variables we wanted to predict. They are created by plotting the training and prediction performance of the model throughout the year for each day's trading window, which goes from the 61st trading minute until the last trading minute of the day.
These windows are ordered and stacked, starting from the first trading day's window until the last day's trading window.
On the x-axis we always have the minutes and on the y-axis the corresponding variable of the plot.
Also, we made use of a linear regression model to compare the validation performance of our model with, which can also be seen in the plots below.
from CODES.utils.plotter import plotter
args = [ #FIRST AXIS training data learned by model validation data predicted by model learned by linear regression predicted by linear regression
[ [[i[0].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=4,alpha=1,fillstyle='left')],[[i[0].detach().numpy() for i in y_t_pred],'-',dict(color='orangered',linewidth=0.5,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),[i[0].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=4,alpha=1,fillstyle='right')],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[0].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=0.3,fillstyle='right',alpha=1)],[range(len(y_t)),y_t_regs[:,0],'-',dict(color='forestgreen',linewidth=0.3,alpha=1,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,0],'-',dict(color='gold',linewidth=0.1,alpha=1,fillstyle='right')]
,[dict(cellText=[[mse_train[0],mape_train[0]],[mse_val[0],mape_val[0]],[reg_mses[0],reg_mapes[0]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,[-2000,93000],[6.7,11.3]
,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Mid Price',dict(fontsize=20)]
,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
,#SECOND AXIS
[ [[i[1].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=4,alpha=1,fillstyle='left')],[[i[1].detach().numpy() for i in y_t_pred],'-',dict(color='orangered',linewidth=0.5,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),[i[1].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=4,alpha=1,fillstyle='right')],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[1].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=0.3,fillstyle='right',alpha=1)],[range(len(y_t)),y_t_regs[:,1],'-',dict(color='forestgreen',linewidth=0.3,alpha=1,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,1],'-',dict(color='gold',linewidth=0.1,alpha=1,fillstyle='right')]
,[dict(cellText=[[mse_train[1],mape_train[1]],[mse_val[1],mape_val[1]],[reg_mses[1],reg_mapes[1]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,[-2000,93000],[6.7,11.3]
,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Bid Price Expectation',dict(fontsize=20)]
,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
,#THIRD AXIS
[ [[i[2].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=4,alpha=1,fillstyle='left')],[[i[2].detach().numpy() for i in y_t_pred],'-',dict(color='orangered',linewidth=0.5,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),[i[2].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=4,alpha=1,fillstyle='right')],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[2].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=0.3,fillstyle='right',alpha=1)],[range(len(y_t)),y_t_regs[:,2],'-',dict(color='forestgreen',linewidth=0.3,alpha=1,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,2],'-',dict(color='gold',linewidth=0.1,alpha=1,fillstyle='right')]
,[dict(cellText=[[mse_train[2],mape_train[2]],[mse_val[2],mape_val[2]],[reg_mses[2],reg_mapes[2]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,[-2000,93000],[6.7,11.3]
,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Ask Price Expectation',dict(fontsize=20)]
,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
,#FOURTH AXIS
[ [[i[3].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=2,alpha=0.5,fillstyle='left')],[[i[3].detach().numpy() for i in y_t_pred],'-',dict(color='orangered',linewidth=1,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),[i[3].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=2,alpha=0.5,fillstyle='right')],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[3].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=1,fillstyle='right',alpha=1)],[range(len(y_t)),y_t_regs[:,3],'-',dict(color='forestgreen',linewidth=0.1,alpha=0.5,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,3],'-',dict(color='gold',linewidth=0.1,alpha=0.5,fillstyle='right')]
,[dict(cellText=[[mse_train[3],mape_train[3]],[mse_val[3],mape_val[3]],[reg_mses[3],reg_mapes[3]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,[-2000,93000],[-0.0002,0.00045]
,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Bid Price Variance',dict(fontsize=20)]
,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
,#FIFTH AXIS
[ [[i[4].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=2,alpha=0.5,fillstyle='left')],[[i[4].detach().numpy() for i in y_t_pred],'-',dict(color='orangered',linewidth=1,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),[i[4].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=2,alpha=0.5,fillstyle='right')],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[4].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=1,fillstyle='right',alpha=1)],[range(len(y_t)),y_t_regs[:,4],'-',dict(color='forestgreen',linewidth=0.1,alpha=0.5,fillstyle='left')],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,4],'-',dict(color='gold',linewidth=0.1,alpha=0.5,fillstyle='right')]
,[dict(cellText=[[mse_train[4],mape_train[4]],[mse_val[4],mape_val[4]],[reg_mses[4],reg_mapes[4]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,[-2000,93000],[-0.0003,0.00055]
,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Ask Price Variance',dict(fontsize=20)]
,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
]
attrs = ['plot','plot','plot','plot','plot','plot'
,'make_table'
,'set_xlim','set_ylim'
,'set_xlabel','set_ylabel','set_title'
,'legend','grid']
plotter(args,attrs,fig_title=f'Converged Result of Model\n Stock: GARAN\n Year: 2017\n Epoch:{ckpt_name[5:-4]} ',dpi=600, ncols=1,xpad=5)#,save_path = os.getcwd())